\subsection{Conditional expectation and law of total expectation}
Recall that if \(B \in \mathcal F\) with \(\prob{B} \geq 0\), we defined
\[
	\prob{A \mid B} = \frac{\prob{A \cap B}}{\prob{B}}
\]
Now, let \(X\) be a random variable, and let \(B\) be an event as above with nonzero probability.
We can then define
\[
	\expect{X \mid B} = \frac{\expect{X \cdot 1(B)}}{\prob{B}}
\]
The numerator is notably zero when \(1(B) = 0\), so in essence we are excluding the case where \(X\) is not \(B\).
\begin{theorem}[law of total expectation]
	Suppose \(X \geq 0\).
	Let \((\Omega_n)\) be a partition of \(\Omega\) into disjoint events, so \(\Omega = \bigcup_n \Omega_n\).
	Then
	\[
		\expect{X} = \sum_n \expect{X \mid \Omega_n} \cdot \prob{\Omega_n}
	\]
\end{theorem}
\begin{proof}
	We can write \(X = X \cdot 1(\Omega)\), where
	\[
		1(\Omega) = \sum_n 1(\Omega_n)
	\]
	Taking expectations, we get
	\[
		\expect{X} = \expect{ \sum_{n} X \cdot 1(\Omega_n) }
	\]
	By countable additivity of expectation, we have
	\[
		\expect{X} = \sum_{n} \expect{ X \cdot 1(\Omega_n) } = \sum_n \expect{X \mid \Omega_n} \cdot \prob{\Omega_n}
	\]
	as required.
\end{proof}

\subsection{Joint distribution}
\begin{definition}
	Let \(X_1, \dots, X_n\) be discrete random variables.
	Their joint distribution is defined as
	\[
		\prob{X_1 = x_1, \dots, X_n = x_n}
	\]
	for all \(x_i \in \Omega_i\).
\end{definition}
Now, we have
\[
	\prob{X_1 = x_1} = \prob{\{ X_1 = x_1\} \cap \bigcup_{i=2}^n \bigcup_{x_i} \{ X_i = x_i \} } = \sum_{x_2, \dots, x_n} \prob{X_1 = x_1, X_2 = x_2, \dots, X_n = x_n}
\]
In general,
\[
	\prob{X_i = x_i} = \sum_{x_1, x_2, \dots, x_{i-1}, x_{i+1}, \dots, x_n} \prob{X_1 = x_1, X_2 = x_2, \dots, X_n = x_n}
\]
We call \((\prob{X_i = x_i})_i\) the marginal distribution of \(X_i\).
Let \(X, Y\) be random variables.
The conditional distribution of \(X\) given \(Y = y\) where \(y \in \Omega_y\) is defined to be
\[
	\prob{X = x \mid Y = y} = \frac{\prob{X = x, Y = y}}{\prob{Y = y}}
\]
We can find
\[
	\prob{X = x} = \sum_y \prob{X = x, Y = y} = \sum_y \prob{X = x \mid Y = y} \prob{Y = y}
\]
which is the law of total probability.

\subsection{Convolution}
Let \(X\) and \(Y\) be independent, discrete random variables.
We would like to find \(\prob{X + Y = z}\).
Clearly this is
\begin{align*}
	\prob{X + Y = z} & = \sum_y \prob{X + Y = z, Y = y}           \\
	                 & = \sum_y \prob{X = z-y, Y = y}             \\
	                 & = \sum_y \prob{X = z-y} \cdot \prob{Y = y} \\
\end{align*}
This last sum is called the convolution of the distributions of \(X\) and \(Y\).
Similarly,
\[
	\prob{X + Y = z} = \sum_x \prob{X = x} \cdot \prob{Y = z-x}
\]
As an example, let \(X \sim \mathrm{Poi}(\lambda)\) and \(Y \sim \mathrm{Poi}(\mu)\) be independent.
Then
\begin{align*}
	\prob{X + Y = n} & = \sum_{r = 0}^n \prob{X = r} \prob{Y = n - r}                                              \\
	                 & = \sum_{r = 0}^n e^{-\lambda} \frac{\lambda^r}{r!} \cdot e^{-\mu} \frac{\mu^{n-r}}{(n-r)!}  \\
	                 & = e^{-(\lambda+\mu)} \sum_{r = 0}^n \frac{\lambda^r\mu^{n-r}}{r!(n-r)!}                     \\
	                 & = \frac{e^{-(\lambda+\mu)}}{n!} \sum_{r = 0}^n \frac{\lambda^r\mu^{n-r} \cdot n!}{r!(n-r)!} \\
	                 & = \frac{e^{-(\lambda+\mu)}}{n!} \sum_{r = 0}^n \binom{n}{r} \lambda^r\mu^{n-r}              \\
	                 & = \frac{e^{-(\lambda+\mu)}}{n!} (\lambda + \mu)^n                                           \\
\end{align*}
which is the probability mass function of a Poisson random variable with parameter \(\lambda + \mu\).
In other words, \(X + Y \sim \mathrm{Poi}(\lambda + \mu)\).

\subsection{Conditional expectation}
Let \(X\) and \(Y\) be discrete random variables.
Then the conditional expectation of \(X\) given that \(Y = y\) is
\begin{align*}
	\expect{X \mid Y = y} &= \frac{\expect{X \cdot 1(Y = y)}}{\prob{Y = y}} \\
	&= \frac{1}{\prob{Y = y}} \sum_x x \cdot \prob{X = x, Y = y} \\
	&= \sum_x x \cdot \prob{X = x \mid Y = y}
\end{align*}
Observe that for every \(y \in \Omega_y\), this expectation is purely a function of \(y\).
Let \(g(y) = \expect{X \mid Y = y}\).
Now, we define the conditional expectation of \(X\) given \(Y\) as \(\expect{X \mid Y} = g(Y)\).
Note that \(\expect{X \mid Y}\) is a random variable, dependent only on \(Y\).
We have
\begin{align*}
	\expect{X \mid Y} & = g(Y) \cdot 1                                \\
	                  & = g(Y) \sum_y 1(Y = y)                        \\
	                  & = \sum_y g(Y) \cdot 1(Y = y)                  \\
	                  & = \sum_y g(y) \cdot 1(Y = y)                  \\
	                  & = \sum_y \expect{X \mid Y = y} \cdot 1(Y = y)
\end{align*}
This is perhaps a clearer way to see that it depends only on \(Y\).
As an example, let us consider tossing a \(p\)-biased coin \(n\) times independently.
We write \(X_i\) for the indicator function that the \(i\)th toss was a head.
Let \(Y_n = X_1 + \dots + X_n\).
What is \(\expect{X_1 \mid Y_n}\)?
Let \(g(y) = \expect{X_1 \mid Y_n = y}\).
Then \(\expect{X_1 \mid Y_n} = g(Y_n)\).
We therefore need to find \(g\).
Let \(y \in \{ 0, \dots, n \}\), then
\begin{align*}
	g(y) & = \expect{X_1 \mid Y_n = y}   \\
	     & = \prob{X_1 = 1 \mid Y_n = y} \\
\end{align*}
Clearly if \(y = 0\), then \(\prob{X_1 = 1 \mid Y_n = 0} = 0\).
Now, suppose \(y \neq 0\).
We have
\begin{align*}
	\prob{X_1 = 1 \mid Y_n = y} & = \frac{\prob{X_1 = 1, Y_n = y}}{\prob{Y_n = y}}                             \\
	                            & = \frac{\prob{X_1 = 1, X_2 + \dots + X_n = y-1}}{\prob{Y_n = y}}             \\
	                            & = \frac{\prob{X_1 = 1} \cdot \prob{X_2 + \dots + X_n = y-1}}{\prob{Y_n = y}} \\
	                            & = \frac{p \cdot \binom{n-1}{y-1} \cdot p^{y-1}(1-p)^{n-y}}{\prob{Y_n = y}}   \\
	                            & = \frac{\binom{n-1}{y-1} \cdot p^y(1-p)^{n-y}}{\binom{n}{y}p^y (1-p)^{n-y}}  \\
	                            & = \frac{\binom{n-1}{y-1}}{\binom{n}{y}}                                      \\
	                            & = \frac{y}{n}
\end{align*}
Hence
\[
	g(y) = \frac{y}{n}
\]
We can then find that
\[
	\expect{X_1 \mid Y_n} = g(Y_n) = \frac{Y_n}{n}
\]
which is indeed a random variable dependent only on \(Y_n\).

\subsection{Properties of conditional expectation}
The following properties hold.
\begin{itemize}
	\item For all \(c \in \mathbb R\), \(\expect{cX \mid Y} = c\expect{X \mid Y}\), and \(\expect{c \mid Y} = c\).
	\item Let \(X_1, \dots, X_n\) be random variables.
	      Then \(\expect{\sum_{i=1}^n X_i \mid Y} = \sum_{i=1}^n \expect{X_i \mid Y}\).
	\item \(\expect{\expect{X \mid Y}} = \expect{X}\).
\end{itemize}
The last property is not obvious from the definition, so it warrants its own proof.
We can see by the standard properties of the expectation that
\begin{align*}
	\expect{X \mid Y}                     & = \sum_y 1(Y = y) \expect{X \mid Y = y}                              \\
	\therefore\ \expect{\expect{X \mid Y}} & = \sum_y \expect{1(Y = y)} \expect{X \mid Y = y}                     \\
	                                      & = \sum_y \prob{Y = y} \expect{X \mid Y = y}                          \\
	                                      & = \sum_y \prob{Y = y} \frac{\expect{X \cdot 1(Y = y)}}{\prob{Y = y}} \\
	                                      & = \sum_y \expect{X \cdot 1(Y = y)}                                   \\
	                                      & = \expect{\sum_y X \cdot 1(Y = y)}                                   \\
	                                      & = \expect{X \sum_y 1(Y = y)}                                         \\
	                                      & = \expect{X}                                                         \\
\end{align*}
Alternatively, we could expand the inner expectation as a sum:
\[
	\sum_y \expect{X \mid Y = y} \cdot \prob{Y = y} = \sum_x \sum_y x \cdot \prob{X = x \mid Y = y} \cdot \prob{Y = y}
\]
and the result follows as required.
The final property relates conditional probability to independence.
Let \(X\) and \(Y\) be independent.
Then \(\expect{X \mid Y} = \expect{X}\).
Indeed,
\begin{align*}
	\expect{X \mid Y} & = \sum_y 1(Y = y) \expect{X \mid Y = y} \\
	                  & = \sum_y 1(Y = y) \expect{X}            \\
	                  & = \expect{X}                            \\
\end{align*}

\begin{proposition}
	Suppose \(Y\) and \(Z\) are independent random variables.
	Then
	\[
		\expect{\expect{X \mid Y} \mid Z} = \expect{X}
	\]
\end{proposition}
\begin{proof}
	Let \(\expect{X \mid Y} = g(Y)\) be a random variable that is a function only of \(Y\).
	Since \(Y\) and \(Z\) are independent, \(f(Y)\) is also independent of \(Z\) for any function \(f\).
	Then \(\expect{g(Y) \mid Z} = \expect{g(Y)} = \expect{X}\).
\end{proof}
\begin{proposition}
	Suppose \(h \colon \mathbb R \to \mathbb R\) is a function.
	Then
	\[
		\expect{h(Y) \cdot X \mid Y} = h(Y) \cdot \expect{X \mid Y}
	\]
	We can `take out what is known', since we know what \(Y\) is.
\end{proposition}
\begin{proof}
	Note that
	\[
		\expect{h(Y) \cdot X \mid Y = y} = \expect{h(y) \cdot X \mid Y = y} = h(y) \cdot \expect{X \mid Y = y}
	\]
	Then
	\[
		\expect{h(Y) \cdot X \mid Y} = h(y) \cdot \expect{X \mid Y}
	\]
	as required.
\end{proof}
\begin{corollary}
	\(\expect{\expect{X \mid Y} \mid Y} = \expect{X \mid Y}\), and \(\expect{X \mid X} = X\).
\end{corollary}
Let \(X_i = 1(i\text{th toss is a head})\), and \(Y_n = X_1 + \dots + X_n\).
We found before that \(\expect{X_1 \mid Y_n} = \frac{Y_n}{n}\).
By symmetry, for all \(i\) we have \(\expect{X_i \mid Y_n} = \expect{X_1 \mid Y_n}\).
Hence
\[
	\expect{Y_n \mid Y_n} = \expect{\sum_{i=1}^n X_i \mid Y_n} = \sum_{i=1}^n \expect{X_i \mid Y_n} = n \cdot \expect{X_1 \mid Y_n}
\]
which yields the same result.
